*This is a STATA Do-file to replicate the standard deviations of Table 5 in Feenstra-Li-Yu (2014, ReSTATA)            *
*The code is by Robert Feenstra (UC-Davis), Zhiyuan Li(SHUFE), and Miaojie Yu (Peking Univ.)                                                                        *
*To run the code, please put all datasets in the same directoray in your PC                                                                                        *
*After getting the standard deviations, we store it in a separate spreadsheet called "bootstrapped t-value" to manually calculate the t-values reported in the text*


********** STATA SETUP
clear
drop _all
set memory 31g
set matsize 11000
set more off

capture log close   
log using Table5_bootstrap.out, text replace



****generate blank stack to save outcomes********
****column (1)***
g beta1f=0
g beta2f=0
g beta3f=0
g beta4f=0
g beta5f=0
g beta6f=0
g beta7f=0
g beta8f=0
g beta9f=0

g se1f=0
g se2f=0
g se3f=0
g se4f=0
g se5f=0
g se6f=0
g se7f=0
g se8f=0
g se9f=0


****column (2)***
g beta1t=0
g beta2t=0
g beta3t=0
g beta4t=0
g beta5t=0
g beta6t=0
g beta7t=0
g beta8t=0
g beta9t=0

g se1t=0
g se2t=0
g se3t=0
g se4t=0
g se5t=0
g se6t=0
g se7t=0
g se8t=0
g se9t=0

****column (3)***
g beta1u=0
g beta2u=0
g beta3u=0
g beta4u=0
g beta5u=0
g beta6u=0
g beta7u=0
g beta8u=0
g beta9u=0

g se1u=0
g se2u=0
g se3u=0
g se4u=0
g se5u=0
g se6u=0
g se7u=0
g se8u=0
g se9u=0

g etam=0
g etam10=0
g etam25=0
g g1=0
g g1_10=0
g g1_25=0
save  Table5_Stack, replace


************
* This program runs the panel bootstrapping by randomly drawing firms.
* There are in fact 5 steps to obtain the standard deviations in Table 5 in the following loop: 
*(1) The pliminary regression of firm TFP (called tfpop) on firm-level indicators, 4-digit industry indicators and ex-ante TFP (called tfpop2), and interactions between 2-digit industry indicators and other variables that appead on the right of Equ. (30) in the text (skipped here)
*(2) The selectin equation (30) in the text using fitted TFP
*(3) The second-step Heckman Equation excluding fitted TFP, used to obtain predicted export share
*(4) The  first-step of the 2SLS estimates, see footnote 26 in the text for details
*(5) The second-step of the 2SLS estimates, see footnote 26 in the text for details

***********************************
forvalues i=1/100 {

use Table5

****bootstrap*********
bsample, cluster(newid)
disp "iteration # `i'"
**********************


***Step (1): Prelim Fit which is already done in previous step and its outcome variable, predicted TFP (tfpp) is already included in the current dataset**
***Step (2): The Selection Equation with fitted TFP (tfpp)*******************


xi: qui probit FX tfpp   tang_percent  tang_dummy klratio* dyear2-dyear9 i.cic_adj
predict XI

predict PROBITXB, xb
gen PDFPROBIT=(1/sqrt(2*_pi))*exp(-(PROBITXB^2/2))
gen CDFPROBIT = normprob(PROBITXB)
gen IMR_klratio = PDFPROBIT/CDFPROBIT      /* gets the inverse mills ratio*/
su IMR_klratio

****Step (3): the second-step Heckman Equation excluding fitted TFP, used to obtain predicted export share, shown as Column (4) of Table 3********************

xi: qui reg expint  tang_percent tang_dummy klratio* IMR_klratio dyear2-dyear9 i.cic_adj
predict expint_p




g diff=expint-expint_p
su diff if FX==1, d
g diffvar=r(Var)  
su diffvar

su expint if FX==1,d   
g etamean=r(mean)
su etamean

g expintpsq=(expint_p)^2
g expintsqp=expintpsq/XI*(1+diffvar/(etamean)^2)
su expintsqp expintpsq

g expintsqp_int=expintsqp*int_usd

g expintsqp_int_sea   =expintsqp_int*sea
g expintsqp_int_nosea =expintsqp_int*nosea



*****Generate variables for main estimates*********************
 g       iv            =exp(tfpop2)
 g       iv_expint     =iv*expint
 g       iv_expintsq   =iv*expintsq 
 
 g       iv_expintp    =iv*expint_p
 g       iv_expintpsq  =iv*(expint_p)^2  
 
 g iv_expintp_sea    =iv_expintp*sea
 g iv_expintp_nosea  =iv_expintp*nosea



g iv_expintsqp =iv*(expintsqp)  
g iv_expintsqp_sea=iv_expintsqp*sea
g iv_expintsqp_nosea=iv_expintsqp*nosea
 
g expintsqp_tang =expintsqp*tang_percent
count

g expintp_int    =expint_p*int_usd
g expintp_tang   =expint_p*tang_percent
g expintp_int_sea  =expintp_int*sea
g expintp_int_nosea=expintp_int*nosea
g expintp_sea      =expint_p*sea
g expintp_nosea      =expint_p*nosea


local i 13
while `i'<=37 { 
qui su tfpop2  if cic2d==`i' 
g tiv`i'=r(Var)  
replace tiv`i'=0 if cic2d!=`i'
local i=`i'+1
}

local i 39
while `i'<=42 { 
qui su tfpop2 if cic2d==`i' 
g tiv`i'=r(Var)  
replace tiv`i'=0 if cic2d!=`i'
local i=`i'+1
}

g tiv=tiv13+tiv14+tiv15+tiv16+tiv17+tiv18+tiv19+tiv20+tiv21+tiv22+tiv23+tiv24+tiv25+tiv26+tiv27+tiv28+tiv29+tiv30+tiv31+tiv32+tiv33+tiv34+tiv35+tiv36+tiv37+tiv39+tiv40+tiv41+tiv42  
su tiv, d





***************

drop if year>2006
codebook tiv
drop if cic2d==.

****Table 5, >0%********
su expint if FX==1
g etam=r(mean)
xi: qui ivreg2 rev_usd  (int_usd  expintp_int expintsqp_int =iv  iv_expintp iv_expintsqp) expint_p   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic2d    dyear* , robust 

g beta1f=_b[int_usd]
g beta2f=_b[expintp_int]
g beta3f=_b[expintsqp_int]
g beta4f=_b[expint_p]
g beta5f=_b[FX]
g beta6f=_b[tang_percent]
g beta7f=_b[expintp_tang]
g beta8f=_b[expintsqp_tang]
g beta9f=_b[tang_dummy]

g se1f=_se[int_usd]
g se2f=_se[expintp_int]
g se3f=_se[expintsqp_int]
g se4f=_se[expint_p]
g se5f=_se[FX]
g se6f=_se[tang_percent]
g se7f=_se[expintp_tang]
g se8f=_se[expintsqp_tang]
g se9f=_se[tang_dummy]

g g1=-(beta2f)^2/beta3f*(1/(1-beta2f/(beta3f*etam)))





****Table 5, >10%********
keep if tiv>=.56678 
su expint if FX==1
g etam10=r(mean)
xi: qui ivreg2 rev_usd  (int_usd  expintp_int expintsqp_int =iv  iv_expintp iv_expintsqp) expint_p   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic2d    dyear* , robust 

g beta1t=_b[int_usd]
g beta2t=_b[expintp_int]
g beta3t=_b[expintsqp_int]
g beta4t=_b[expint_p]
g beta5t=_b[FX]
g beta6t=_b[tang_percent]
g beta7t=_b[expintp_tang]
g beta8t=_b[expintsqp_tang]
g beta9t=_b[tang_dummy]

g se1t=_se[int_usd]
g se2t=_se[expintp_int]
g se3t=_se[expintsqp_int]
g se4t=_se[expint_p]
g se5t=_se[FX]
g se6t=_se[tang_percent]
g se7t=_se[expintp_tang]
g se8t=_se[expintsqp_tang]
g se9t=_se[tang_dummy]

g g1_10=-(beta2t)^2/beta3t*(1/(1-beta2t/(beta3t*etam10)))




****Table 5, >25%********
keep if tiv>= .670989
su expint if FX==1
g etam25=r(mean)
xi: qui ivreg2 rev_usd  (int_usd  expintp_int expintsqp_int =iv  iv_expintp iv_expintsqp) expint_p   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic2d    dyear* , robust 
g beta1u=_b[int_usd]
g beta2u=_b[expintp_int]
g beta3u=_b[expintsqp_int]
g beta4u=_b[expint_p]
g beta5u=_b[FX]
g beta6u=_b[tang_percent]
g beta7u=_b[expintp_tang]
g beta8u=_b[expintsqp_tang]
g beta9u=_b[tang_dummy]

g se1u=_se[int_usd]
g se2u=_se[expintp_int]
g se3u=_se[expintsqp_int]
g se4u=_se[expint_p]
g se5u=_se[FX]
g se6u=_se[tang_percent]
g se7u=_se[expintp_tang]
g se8u=_se[expintsqp_tang]
g se9u=_se[tang_dummy]

g g1_25=-(beta2t)^2/beta3t*(1/(1-beta2t/(beta3t*etam25)))


duplicates drop beta1f, force
keep beta* se* g1*
  append using Table5_Stack
save Table5_Stack, replace
}
drop sea
su se* beta*


********** CLOSE OUTPUT
drop _all
log close